library(data.table)
library(fixest)
library(multcomp)
library(pwr)
library(survey)

# set working directory to location where replication archive is stored
setwd("")

# read in data for Study 2 (vignette)
study2_responses <- fread("Study2_Final.csv", header = TRUE, stringsAsFactors = FALSE)

################################################################################
################################################################################

# MAIN PAPER

# FIGURE 2

# pre-post change in level of interest in vacationing in Florida
# among all respondents
interest_change <- lm(FL_interest_change ~ FL_treatment, data = study2_responses)
summary(interest_change)

# among Republicans and Democrats
study2_responses$pid <- relevel(as.factor(study2_responses$pid), ref = "R")
interest_change_pid <- lm(FL_interest_change ~ FL_treatment*pid, 
                          data = study2_responses[which(study2_responses$pid=="D"|study2_responses$pid=="R"),])
summary(interest_change_pid)
# because the CATE for Democrats is a combination of coefficients, we use the
# glht function from the multcomp package to conduct our hypothesis tests and
# obtain point estimates and 95% CIs
interest_change_R <- confint(glht(interest_change_pid, linfct = "FL_treatmentTreatment = 0"))
interest_change_D <- confint(glht(interest_change_pid, linfct = "FL_treatmentTreatment + FL_treatmentTreatment:pidD = 0"))

# interest in receiving more information about vacationing in Florida
# among all respondents
info_interest <- lm(FL_want_info ~ FL_treatment, data = study2_responses)
summary(info_interest)
# among Republicans and Democrats
info_interest_pid <- lm(FL_want_info ~ FL_treatment*pid, data = study2_responses[which(study2_responses$pid=="D"|study2_responses$pid=="R"),])
summary(info_interest_pid)
# because the CATE for Democrats is a combination of coefficients, we use the
# glht function from the multcomp package to conduct our hypothesis tests and
# obtain point estimates and 95% CIs
info_interest_R <- confint(glht(info_interest_pid, linfct = "FL_treatmentTreatment = 0"))
info_interest_D <- confint(glht(info_interest_pid, linfct = "FL_treatmentTreatment + FL_treatmentTreatment:pidD = 0"))

# plotting the ATE/CATE point estimates (first for all respondents, then among
# Democrats, and then among Republicans)
png(file = "florida_vignette.png", family = "sans", height = 7, width=12,
    res = 1200, units = "in")
par(mar=c(5.1,14,5,0.5))
plot(x=rev(c(interest_change$coefficients["FL_treatmentTreatment"],
             interest_change_D$confint[,"Estimate"],
             interest_change_R$confint[,"Estimate"],
             info_interest$coefficients["FL_treatmentTreatment"]+0.5,
             info_interest_D$confint[,"Estimate"]+0.5,
             info_interest_R$confint[,"Estimate"]+0.5)),
     y=rep(1:3,2),
     xlim=c(-.20,.70),
     xaxt="n",
     ylim=c(0.5, 3.5),
     pch=19,
     tck=-.02,
     cex.axis=0.9,
     cex=1.25,
     ylab="",
     yaxt="n",
     xlab="",
     axes = FALSE,
     panel.first = c(abline(v=0,lwd=2, col="gray90",lty=2),
                     abline(v=0.5,lwd=2, col="gray90",lty=2))
)
# plotting the 95% confidence intervals for the corresponding ATE/CATE
segments(x0=rev(c(confint(interest_change)["FL_treatmentTreatment",1],
                  interest_change_D$confint[,"lwr"],
                  interest_change_R$confint[,"lwr"],
                  confint(info_interest)["FL_treatmentTreatment",1]+0.5,
                  info_interest_D$confint[,"lwr"]+0.5,
                  info_interest_R$confint[,"lwr"]+0.5)),
         x1=rev(c(confint(interest_change)["FL_treatmentTreatment",2],
                  interest_change_D$confint[,"upr"],
                  interest_change_R$confint[,"upr"],
                  confint(info_interest)["FL_treatmentTreatment",2]+0.5,
                  info_interest_D$confint[,"upr"]+0.5,
                  info_interest_R$confint[,"upr"]+0.5)),
         y0=rep(1:3,2))
# creating x-axis labels to indicate substantive meaning of point estimates
axis(1,at=c(-.2, -.1, 0, .1, .2), 
     labels = c("-0.20", "-0.10", "0.00", "0.10", "0.20"), cex.axis = 1.5)
axis(1,at=c(.3, .4, .5, .6, .7), 
     labels = c("-0.20", "-0.10", "0.00", "0.10", "0.20"), cex.axis = 1.5)
mtext("Difference from Control", side = 1, at=0, line = 3, cex = 1.5)
mtext("Difference from Control", side = 1, at=0.5, line = 3, cex = 1.5)
# creating y-axis labels to indicate which point estimates correspond with which
# respondents
par(mgp=c(0,11,0))
axis(2,at=c(1:3),
     las=2,
     labels=rev(c("All Respondents",
                  "Democrats Only",
                  "Republicans Only")),
     tck=0,
     lwd = 0,
     line = 1,
     cex.axis=1.5, hadj=0)
# creating titles for each pane to indicate which outcome they correspond with
mtext("Change in Interest in\nVacationing in Florida", side = 3, at=0, line=0.5, cex = 2)
mtext("Probability of Wanting More\nInformation About Florida", side = 3, at=0.5, line=0.5, cex = 2)
# printing numerical values for each point estimate
text(rev(c(interest_change$coefficients["FL_treatmentTreatment"],
           interest_change_D$confint[,"Estimate"],
           interest_change_R$confint[,"Estimate"],
           info_interest$coefficients["FL_treatmentTreatment"]+0.5,
           info_interest_D$confint[,"Estimate"]+0.5,
           info_interest_R$confint[,"Estimate"]+0.5)),
     rep(1:3,2) + 0.1,
     paste0(sprintf("%.2f",rev(c(interest_change$coefficients["FL_treatmentTreatment"],
                                 interest_change_D$confint[,"Estimate"],
                                 interest_change_R$confint[,"Estimate"],
                                 info_interest$coefficients["FL_treatmentTreatment"],
                                 info_interest_D$confint[,"Estimate"],
                                 info_interest_R$confint[,"Estimate"]
     )),2)), cex=1.5)
dev.off()

################################################################################
################################################################################

# SUPPLEMENTAL INFORMATION

# TABLE SI.A2b

# gender
table(study2_responses$gender, useNA = "always")
round(prop.table(table(study2_responses$gender, useNA = "always")),3)*100
# age
table(study2_responses$age, useNA = "always")
round(prop.table(table(study2_responses$age, useNA = "always")),3)*100
# ethnicity/race
table(study2_responses$race_ethnicity, useNA = "always")
round(prop.table(table(study2_responses$race_ethnicity, useNA = "always")),3)*100
# education
table(study2_responses$education, useNA = "always")
round(prop.table(table(study2_responses$education, useNA = "always")),3)*100
# income
table(study2_responses$income, useNA = "always")
round(prop.table(table(study2_responses$income, useNA = "always")),3)*100
# party identification
table(study2_responses$pid, useNA = "always")
round(prop.table(table(study2_responses$pid, useNA = "always")),3)*100
# ideology
table(study2_responses$ideology, useNA = "always")
round(prop.table(table(study2_responses$ideology, useNA = "always")),3)*100

################################################################################

# In-text at the beginning of Section SI.B, we note that "The substantive 
# interpretation of our findings is consistent across both experiments when we use
# information about attention check passage to calculate complier average treatment
# effects."  We provide the code here to estimate complier average treatment effects
# for the analyses that underlie our main results (Figure 1/Table SI.3).

# We calculate the CACEs when respondents who passed at least one attention check
# are considered treated and when only respondents who passed both attention 
# checks are treated; these CACEs represent the lower and upper bounds of the
# true CACE (Gerber and Green 2012, pgs. 164-165).

# CACEs calculated using instrumental variables approach/two-stage least squares
# with feols

# Please note that the general substantive results from our main analysis--that
# Democrats express less interest in vacationing in Florida after being
# exposed to Florida's recent backsliding when compared to those same respondents'
# pre-treatment interest--persist.

# recoding treatment as binary indicator and incorporating attentiveness
study2_responses$treatment <- ifelse(study2_responses$FL_treatment=="Treatment", 1, 0)
study2_responses$treatment_att1 <- ifelse(study2_responses$FL_treatment=="Treatment" & study2_responses$attentiveness>=1, 1, 0)
study2_responses$treatment_att2 <- ifelse(study2_responses$FL_treatment=="Treatment" & study2_responses$attentiveness>=2, 1, 0)

# CACEs among all respondents

interest_change_model_iv_treatment_att1 <- feols(FL_interest_change ~ 1 | treatment_att1 ~ treatment, 
                                              cluster = "ResponseId", data = study2_responses)
summary(interest_change_model_iv_treatment_att1)
interest_change_model_iv_treatment_att2 <- feols(FL_interest_change ~ 1 | treatment_att2 ~ treatment, 
                                              cluster = "ResponseId", data = study2_responses)
summary(interest_change_model_iv_treatment_att2)

want_info_model_iv_treatment_att1 <- feols(FL_want_info ~ 1 | treatment_att1 ~ treatment, 
                                                 cluster = "ResponseId", data = study2_responses)
summary(want_info_model_iv_treatment_att1)
want_info_model_iv_treatment_att2 <- feols(FL_want_info ~ 1 | treatment_att2 ~ treatment, 
                                                 cluster = "ResponseId", data = study2_responses)
summary(want_info_model_iv_treatment_att2)

# CACEs among Democrats

interest_change_model_iv_treatment_att1 <- feols(FL_interest_change ~ 1 | treatment_att1 ~ treatment, 
                                                 cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="D"),])
summary(interest_change_model_iv_treatment_att1)
interest_change_model_iv_treatment_att2 <- feols(FL_interest_change ~ 1 | treatment_att2 ~ treatment, 
                                                 cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="D"),])
summary(interest_change_model_iv_treatment_att2)

want_info_model_iv_treatment_att1 <- feols(FL_want_info ~ 1 | treatment_att1 ~ treatment, 
                                           cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="D"),])
summary(want_info_model_iv_treatment_att1)
want_info_model_iv_treatment_att2 <- feols(FL_want_info ~ 1 | treatment_att2 ~ treatment, 
                                           cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="D"),])
summary(want_info_model_iv_treatment_att2)

# CACEs among Republicans

interest_change_model_iv_treatment_att1 <- feols(FL_interest_change ~ 1 | treatment_att1 ~ treatment, 
                                                 cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="R"),])
summary(interest_change_model_iv_treatment_att1)
interest_change_model_iv_treatment_att2 <- feols(FL_interest_change ~ 1 | treatment_att2 ~ treatment, 
                                                 cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="R"),])
summary(interest_change_model_iv_treatment_att2)

want_info_model_iv_treatment_att1 <- feols(FL_want_info ~ 1 | treatment_att1 ~ treatment, 
                                           cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="R"),])
summary(want_info_model_iv_treatment_att1)
want_info_model_iv_treatment_att2 <- feols(FL_want_info ~ 1 | treatment_att2 ~ treatment, 
                                           cluster = "ResponseId", data = study2_responses[which(study2_responses$pid=="R"),])
summary(want_info_model_iv_treatment_att2)
################################################################################

# in the text at the beginning of SI.B2, we discuss the power of our experiment
# with respect to the information seeking outcome; the code underlying that
# discussion is as follows:

# how many respondents are assigned to treatment and control
table(study2_responses$FL_treatment)

# power for difference in proportions tests
pwr.2p2n.test(n1 = 568, n2 = 598, sig.level = 0.05, power = 0.80)

# power for difference in means tests
pwr.t2n.test(n1 = 568, n2 = 598, sig.level = 0.05, power = 0.80)

# TABLE SI.6

# column 1
summary(interest_change)
# column 2
summary(interest_change_pid)

# TABLE SI.7

# column 1
summary(info_interest)
# column 2
summary(info_interest_pid)

# SURVEY WEIGHTING ROBUSTNESS CHECKS

# first need to create weights

# NOTE: all four demos used by CloudResearch for quota sampling were also
# recorded by the researchers with their own questions.  The demographic
# characteristics reported in Table SI.2 use the researchers' own questions.
# while the demos are recorded by CloudResearch (where available) are used
# to construct survey weights in order to weight according to CloudResearch's 
# original targets

# weights only calculated among respondents with non-missing values of relevant
# demos
respondent_demos_weights <- study2_responses[!(is.na(study2_responses$gender_cloudresearch) | 
                                                         is.na(study2_responses$age_cloudresearch) | 
                                                         is.na(study2_responses$race_cloudresearch) | 
                                                         is.na(study2_responses$ethnicity_cloudresearch) | 
                                                         is.na(study2_responses$income) | 
                                                         is.na(study2_responses$education) | 
                                                         study2_responses$education=="" |
                                                         is.na(study2_responses$pid) | 
                                                         is.na(study2_responses$ideology)),]

# the ACS bins income above $100k, so we need to do so with our income measure
# as well before using the survey package
respondent_demos_weights$income_acs <- ifelse(respondent_demos_weights$income=="$100,000-$199,999" |
                                                respondent_demos_weights$income=="$200,000 or more", 
                                                "$100,000 or more", respondent_demos_weights$income)

final_list_weights <- svydesign(ids=~1, data = respondent_demos_weights[which(!is.na(respondent_demos_weights$ResponseId)),])

# CloudResearch target weights on gender: 50% male, 50% female
gender.dist <- data.frame(gender_cloudresearch = c("0", "1"),
                          Freq = nrow(final_list_weights) * c(0.59, 0.50))

# CloudResearch target weights on age: 22% 18-29, 26% 30-44, 26.0% 45-59, 26% 60-99
age.dist <- data.frame(age_cloudresearch = c("1", "2", "3", "4"),
                       Freq = nrow(final_list_weights) * c(0.22, 0.26, 0.26, 0.26))

# CloudResearch target weights on race: 78.1% white, 13.9% black, 8.0% other
race.dist <- data.frame(race_cloudresearch = c("1", "2", "3"),
                           Freq = nrow(final_list_weights) * c(0.781, 0.139, 0.080))

# CloudResearch target weights on race: 84.1% not Hispanic, 15.9% Hispanic
eth.dist <- data.frame(ethnicity_cloudresearch = c("0", "1"),
                        Freq = nrow(final_list_weights) * c(0.841, 0.159))

# American Community Survey (2021, 1-year) estimates for education: 10.8% less 
# than HS education, 27.3% high school, 29.5% some college but no four-year degree,
# 20.2% four-year degree, 12.2% post-graduate degree
educ.dist <- data.frame(education = c("Some high school, or less", 
                                      "High school graduate or GED", 
                                      "Some college, no 4-year degree", 
                                      "College graduate", 
                                      "Post-graduate degree"),
                        Freq = nrow(final_list_weights) * c(0.108, 0.273, 0.295, 0.202, 0.122))

# American Community Survey (2021, 1-year) estimates for income: 11.7% less than $25k,
# 32.3% $25k-$49,999, 23.7% $50k to $74,999, 12.5% $75k to $99,999, 20.0% over $100k
income.dist <- data.frame(income_acs = c("Less than $25,000", "$25,000-$49,999", 
                                         "$50,000-$74,999", "$75,000-$99,999", 
                                         "$100,000 or more"),
                          Freq = nrow(final_list_weights) * c(0.117, 0.322, 0.237, 0.125, 0.200))

# ANES (2020) estimates for party ID (collapsing leaners into parties): 
# 45.7% Democrats, 40.6% Republicans, 10.5% Independents, 3.2% Other
pid.dist <- data.frame(pid = c("D", "R", "I", "O"),
                       Freq = nrow(final_list_weights) * c(0.457, 0.406, 0.105, 0.032))

# ANES (2020) estimates for ideology: 5.2% extreme liberal, 17.2% liberal, 13.0%
# slightly liberal, 25.8% moderate, 11.6% slightly conservative, 21.1% conservative,
# 6.1% extremely conservative
ideo.dist <- data.frame(ideology = c("1", "2", "3", "4", "5", "6", "7"),
                        Freq = nrow(final_list_weights) * c(0.052, 0.172, 0.130, 0.258, 0.116, 0.211, 0.061))

final_list_weights.rake <- rake(design = final_list_weights,
                                sample.margins = list(~gender_cloudresearch, ~age_cloudresearch,
                                                      ~race_cloudresearch, ~ethnicity_cloudresearch,
                                                      ~education, ~income_acs,
                                                      ~pid, ~ideology),
                                population.margins = list(gender.dist, age.dist,
                                                          race.dist, eth.dist,
                                                          educ.dist, income.dist,
                                                          pid.dist, ideo.dist))

# extracting respondent IDs and weights from rake object

survey_weights <- data.frame("respondent"=final_list_weights.rake$variables$ResponseId, 
                             "weight"=final_list_weights.rake$prob)

# merging weights back into original data frame

study2_responses <- merge(study2_responses, survey_weights, by.x="ResponseId", by.y = "respondent", all.x = TRUE)

# TABLE SI.8

# pre-post change in level of interest in vacationing in Florida
# among all respondents
interest_change_weighted <- lm(FL_interest_change ~ FL_treatment, data = study2_responses,
                                weights = weight)
summary(interest_change_weighted)

# among Republicans and Democrats
study2_responses$pid <- relevel(as.factor(study2_responses$pid), ref = "R")
interest_change_pid_weighted <- lm(FL_interest_change ~ FL_treatment*pid, 
                                   data = study2_responses[which(study2_responses$pid=="D"|study2_responses$pid=="R"),],
                                   weights = weight)
summary(interest_change_pid_weighted)
# because the CATE for Democrats is a combination of coefficients, we use the
# glht function from the multcomp package to conduct our hypothesis tests and
# obtain point estimates and 95% CIs
interest_change_R_weighted <- confint(glht(interest_change_pid_weighted, linfct = "FL_treatmentTreatment = 0"))
interest_change_D_weighted <- confint(glht(interest_change_pid_weighted, linfct = "FL_treatmentTreatment + FL_treatmentTreatment:pidD = 0"))
interest_change_R_weighted
interest_change_D_weighted

# interest in receiving more information about vacationing in Florida
# among all respondents
info_interest_weighted <- lm(FL_want_info ~ FL_treatment, data = study2_responses, weights = weight)
summary(info_interest_weighted)
# among Republicans and Democrats
info_interest_pid_weighted <- lm(FL_want_info ~ FL_treatment*pid, 
                        data = study2_responses[which(study2_responses$pid=="D"|study2_responses$pid=="R"),],
                        weights = weight)
summary(info_interest_pid_weighted)
# because the CATE for Democrats is a combination of coefficients, we use the
# glht function from the multcomp package to conduct our hypothesis tests and
# obtain point estimates and 95% CIs
info_interest_R_weighted <- confint(glht(info_interest_pid_weighted, linfct = "FL_treatmentTreatment = 0"))
info_interest_D_weighted <- confint(glht(info_interest_pid_weighted, linfct = "FL_treatmentTreatment + FL_treatmentTreatment:pidD = 0"))
info_interest_R_weighted
info_interest_D_weighted